In [1]:
%%capture
import classify
import sentiment
import lime
from sklearn.pipeline import make_pipeline
import sklearn
import numpy as np
import random
from lime.lime_text import LimeTextExplainer
import eli5
import plot_confusion_matrix
from eli5.lime import TextExplainer
import pandas as pd
import matplotlib.pyplot as plt
import string
In [2]:
def scramble(sentence):
   split = sentence.split()  # Split the string into a list of words
   random.shuffle(split)  # This shuffles the list in-place.
   return ' '.join(split)  # Turn the list back into a string

1 Classifier Explanations

In [3]:
unlabeled, sent, cls = sentiment.main()
vectorizer = sent.count_vect
c = make_pipeline(vectorizer, cls)
class_names = ['NEGATIVE', 'POSITIVE']
Reading data
-- train data
sentiment/train.tsv
4582
-- dev data
sentiment/dev.tsv
458
-- transforming data and labels

Training classifier
Best C for LOGREG = 100

Evaluating
  Accuracy on train  is: 1.0
  Accuracy on dev  is: 0.7947598253275109
sentiment/unlabeled.tsv

Returning unlabeled data, vectorizer and logistic classifier

Brief Description

  • Vectorizer: TfidfVectorizer(ngram_range=(1,3))
  • Classifier: LogisticRegression(C=100, random_state=0, solver='lbfgs', max_iter=10000)
In [4]:
print("Number of features: " + str(len(cls.coef_[0])))
Number of features: 167716

Top Features

In [5]:
eli5.show_weights(cls,feature_names=sent.count_vect.get_feature_names(),target_names=class_names)
Out[5]:

y=POSITIVE top features

Weight? Feature
+20.734 great
+13.540 amazing
+13.139 love
+12.064 delicious
+11.670 excellent
+11.326 best
+10.666 awesome
+9.758 and
+9.535 the best
+8.692 friendly
+7.152 good
… 83048 more positive …
… 84649 more negative …
-6.921 disappointing
-7.286 went
-8.731 bad
-8.877 the worst
-8.889 rude
-9.225 terrible
-12.972 worst
-13.067 horrible
-15.124 not

Bias

In [108]:
print("Result for an empty input:\n")
empty_pred = c.predict([""])[0]
print("Prediction: " + class_names[empty_pred])
print("Confidence: " + str(float(c.decision_function([""]))))
print("Probability: " + str(c.predict_proba([""])[0,empty_pred]))
Result for an empty input:

Prediction: NEGATIVE
Confidence: -0.23866530205298517
Probability: 0.5593847077717795

Confusion Matrix

In [6]:
plot_confusion_matrix.plot_confusion_matrix(sent.devy, cls.predict(sent.devX),classes=np.array(class_names))
Confusion matrix, without normalization
[[181  48]
 [ 46 183]]
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a25565438>

1.1 My Favorite Review

In [7]:
fav_review = unlabeled.data[643]
print("My favorite review:")
print(fav_review + '\n')
pred = c.predict([fav_review])[0]
print("Prediction: " + class_names[pred])
print("Confidence: " + str(float(c.decision_function([fav_review]))))
print("Probability: " + str(c.predict_proba([fav_review])[0,pred]))
My favorite review:
Stay Away from here!!  Don't eat Here!!I wish they had a Zero Star rating!!! Horrible, Horrible!! Our party of four ordered pizza and an order of fries to be

Prediction: NEGATIVE
Confidence: -6.346471584031029
Probability: 0.9982501460833488

Most Impactful Features

In [8]:
a = sent.count_vect.transform([fav_review]).T.toarray()
r1 = (np.array(sent.count_vect.get_feature_names()).T[(a != 0).reshape(-1,)])
r2 = cls.coef_[a.T != 0]
product = (r2 * (a[a != 0]))
min5 = product.argsort()[:5]
max5 = product.argsort()[-5:]
plt.bar(r1[max5],product[max5])
plt.title('Most Positive Features')
plt.show()
In [9]:
plt.bar(r1[min5],product[min5])
plt.title('Most Negative Features')
plt.show()

Lime Text Explainer

In [10]:
explainer = LimeTextExplainer(class_names=class_names,bow=False)
exp = explainer.explain_instance(fav_review, c.predict_proba, num_features=15)
exp.show_in_notebook(text=True)

Scrambled

In [11]:
scrambled = scramble(fav_review)
scrambled_exp = explainer.explain_instance(scrambled, c.predict_proba, num_features=15)
scrambled_exp.show_in_notebook(text=True)

Without the Two Most Important Words

In [12]:
top2words = (np.array(exp.as_list())[:2])[:,0]
top2words = top2words.tolist()
wo_most_impact1 = fav_review.replace(top2words[0],'').replace(top2words[1],'')
In [13]:
wo_exp = explainer.explain_instance(wo_most_impact1, c.predict_proba, num_features=15)
wo_exp.show_in_notebook(text=True)

1.2 Overconfident Review

In [50]:
oc_review = unlabeled.data[70204]
print("An overconfident review:")
print(oc_review + '\n')
oc_pred = c.predict([oc_review])[0]
print("Prediction: " + class_names[oc_pred])
print("Confidence: " + str(float(c.decision_function([oc_review]))))
print("Probability: " + str(c.predict_proba([oc_review])[0,oc_pred]))
An overconfident review:
Well we really weren't sure how you could get full having a fondue meal but I have to say we left full and satisfied.  Our server, Jose went above

Prediction: NEGATIVE
Confidence: -3.118150813356258
Probability: 0.9576352701855256

Most Impactful Features

In [15]:
a = sent.count_vect.transform([oc_review]).T.toarray()
r1 = (np.array(sent.count_vect.get_feature_names()).T[(a != 0).reshape(-1,)])
r2 = cls.coef_[a.T != 0]
product = (r2 * (a[a != 0]))
min5 = product.argsort()[:5]
max5 = product.argsort()[-5:]
plt.bar(r1[max5],product[max5])
plt.title('Most Positive Features')
plt.show()
In [16]:
plt.bar(r1[min5],product[min5])
plt.title('Most Negative Features')
plt.show()

Lime Text Explainer

In [17]:
oc_exp = explainer.explain_instance(oc_review, c.predict_proba, num_features=15)
oc_exp.show_in_notebook(text=True)

Scrambled

In [18]:
scrambled = scramble(oc_review)
scrambled_exp = explainer.explain_instance(scrambled, c.predict_proba, num_features=15)
scrambled_exp.show_in_notebook(text=True)

Without the Two Most Important Words

In [19]:
top2words = (np.array(oc_exp.as_list())[:2])[:,0]
top2words = top2words.tolist()
wo_most_impact2 = oc_review.replace(top2words[0],'').replace(top2words[1],'')
In [20]:
wo_exp2 = explainer.explain_instance(wo_most_impact2, c.predict_proba, num_features=15)
wo_exp2.show_in_notebook(text=True)

1.3 Input Your Own Text!

In [145]:
test_input = "The food at the restaurant was pretty good but the service was a terrible"
print("Test Input:")
print(test_input + '\n')
ti_pred = c.predict([test_input])[0]
print("Prediction: " + class_names[ti_pred])
print("Confidence: " + str(float(c.decision_function([test_input]))))
print("Probability: " + str(c.predict_proba([test_input])[0,ti_pred]))
Test Input:
The food at the restaurant was pretty good but the service was a terrible

Prediction: NEGATIVE
Confidence: -7.5141404414923985
Probability: 0.999454978661801

Most Impactful Features

In [146]:
a = sent.count_vect.transform([test_input]).T.toarray()
r1 = (np.array(sent.count_vect.get_feature_names()).T[(a != 0).reshape(-1,)])
r2 = cls.coef_[a.T != 0]
product = (r2 * (a[a != 0]))
min5 = product.argsort()[:5]
max5 = product.argsort()[-5:]
plt.bar(r1[max5],product[max5])
plt.title('Most Positive Features')
plt.show()
In [147]:
plt.bar(r1[min5],product[min5])
plt.title('Most Negative Features')
plt.show()

Lime Text Explainer

In [143]:
ti_exp = explainer.explain_instance(test_input, c.predict_proba, num_features=15)
ti_exp.show_in_notebook(text=True)

Scrambled

In [144]:
scrambled = scramble(test_input)
ti_scrambled_exp = explainer.explain_instance(scrambled, c.predict_proba, num_features=15)
ti_scrambled_exp.show_in_notebook(text=True)

Without the Two Most Important Words

In [76]:
top2words = (np.array(ti_exp.as_list())[:2])[:,0]
top2words = top2words.tolist()
wo_most_impact3 = test_input.replace(top2words[0],'').replace(top2words[1],'')
In [77]:
wo_exp3 = explainer.explain_instance(wo_most_impact3, c.predict_proba, num_features=15)
wo_exp3.show_in_notebook(text=True)

2 Second Classification Task: Hate Speech Detection

In recent years, there has been constant outrage over Twitter's banning of users who tweet inflammatory statements. There is a need to balance between creating a safe space for everyone and allowing freedom of speech to thrive on social media. A good hate speech detector is crucial for this. In this classication task, I use a TF-IDF vectorizer along with a multi-class logistic regression classifier to classify tweets as one of the three categories: hate speech, offensive language, or neither.

2.1 Brief Descriptions

In [29]:
hate_speech = pd.read_csv('labeled_data.csv')
del hate_speech['index']
del hate_speech['count']
del hate_speech['hate_speech']
del hate_speech['offensive_language']
del hate_speech['neither']
hs_classes = ['hate speech', 'offensive language', 'neither']
msk = np.random.rand(len(hate_speech)) < 0.8
train = hate_speech[msk]
test = hate_speech[~msk]
hate_speech.head()
Out[29]:
class tweet
0 2 !!! RT @mayasolovely: As a woman you shouldn't...
1 1 !!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2 1 !!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3 1 !!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4 1 !!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...

Train

In [123]:
vec = sklearn.feature_extraction.text.TfidfVectorizer(ngram_range=(1,3))
clf2 = sklearn.linear_model.LogisticRegression(solver='lbfgs',multi_class='ovr',max_iter=200)
c1 = make_pipeline(vec,clf2)
c1.fit(train['tweet'],train['class'])
print("Number of features: " + str(len(clf2.coef_[0])))
Number of features: 384952

Confusion Matrix

In [124]:
print("Test Accuracy: ",c1.score(test['tweet'],test['class']),'\n')
plot_confusion_matrix.plot_confusion_matrix(test['class'], c1.predict(test['tweet']),classes=np.array(hs_classes))
Test Accuracy:  0.8555941910411127 

Confusion matrix, without normalization
[[  23  231   20]
 [  17 3662   62]
 [   0  376  498]]
Out[124]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a28c525c0>

Top Features

In [125]:
eli5.show_weights(c1,feature_names=vec.get_feature_names(),target_names=hs_classes)
Out[125]:
y=hate speech top features y=offensive language top features y=neither top features
Weight? Feature
+7.980 faggot
+5.936 nigger
+4.873 white
+4.829 fag
+4.250 faggots
+3.747 niggers
+3.501 fags
+3.344 white trash
+3.066 nigga
+2.718 fucking
+2.497 hate
+2.430 queer
+2.279 kill
+2.182 niggas
… 28139 more positive …
… 356794 more negative …
-2.599 hoe
-2.691 hoes
-2.702 <BIAS>
-2.988 pussy
-3.805 bitches
-6.084 bitch
Weight? Feature
+20.208 bitch
+12.948 bitches
+10.681 pussy
+9.891 hoes
+8.838 hoe
+5.255 shit
+3.618 her
+3.048 these
+3.009 ain
+2.995 my
+2.871 ass
… 277441 more positive …
… 107492 more negative …
-3.486 brownies
-4.031 colored
-4.306 monkey
-4.472 birds
-5.687 yellow
-6.000 charlie
-6.429 bird
-6.586 yankees
-8.700 trash
Weight? Feature
+9.415 trash
+7.820 bird
+7.576 yankees
+7.063 charlie
+6.606 yellow
+5.421 birds
+4.418 colored
+4.339 monkey
… 84433 more positive …
… 300500 more negative …
-4.146 fucking
-4.220 faggot
-4.457 nigga
-4.686 you
-5.325 fuck
-5.533 shit
-5.548 ass
-7.922 hoe
-9.041 hoes
-9.580 pussy
-11.546 bitches
-18.731 bitch

Bias

In [128]:
print("Result for an empty input:\n")
empty_pred = c1.predict([""])[0]
print("Prediction: " + hs_classes[empty_pred])
print("Probability: " + str(c1.predict_proba([""])[0,empty_pred]))
Result for an empty input:

Prediction: offensive language
Probability: 0.675912379153758

2.2 My Favorite Instance

In [129]:
fav_instance = test['tweet'][5883]
print("My Favorite Instance:")
print(fav_instance + '\n')
fav_pred = c1.predict([fav_instance])[0]
print("Prediction: " + hs_classes[fav_pred])
print("True Label: " + hs_classes[test['class'][5883]])
print("Confidence: " + str(c1.predict_proba([fav_instance])[0,fav_pred]))
My Favorite Instance:
@drboycewatkins1 Coons and monkeys like you disgust me, you minstrel show negro. Crying about shit all the time you're a disgrace everywhere

Prediction: offensive language
True Label: hate speech
Confidence: 0.8027090363700423

Most Impactful Features

In [130]:
a = vec.transform([fav_instance]).T.toarray()
r1 = (np.array(vec.get_feature_names()).T[(a != 0).reshape(-1,)])
r2 = (clf2.coef_[0].reshape(1,-1))[a.T != 0]
product = (r2 * (a[a != 0]))
min5 = product.argsort()[:5]
max5 = product.argsort()[-5:]
idx_hs = np.concatenate((min5,max5))
plt.bar(r1[idx_hs],product[idx_hs])
plt.title('Most Impactful Features for y = hate speech')
plt.show()
In [131]:
r1 = (np.array(vec.get_feature_names()).T[(a != 0).reshape(-1,)])
r2 = (clf2.coef_[1].reshape(1,-1))[a.T != 0]
product = (r2 * (a[a != 0]))
min5 = product.argsort()[:5]
max5 = product.argsort()[-5:]
idx_hs = np.concatenate((min5,max5))
plt.bar(r1[idx_hs],product[idx_hs])
plt.title('Most Impactful Features for y = offensive language')
plt.show()
In [132]:
r1 = (np.array(vec.get_feature_names()).T[(a != 0).reshape(-1,)])
r2 = (clf2.coef_[2].reshape(1,-1))[a.T != 0]
product = (r2 * (a[a != 0]))
min5 = product.argsort()[:5]
max5 = product.argsort()[-5:]
idx_hs = np.concatenate((min5,max5))
plt.bar(r1[idx_hs],product[idx_hs])
plt.title('Most Impactful Features for y = neither')
plt.show()

Lime Text Explainer

In [148]:
hs_explainer = LimeTextExplainer(class_names=hs_classes,bow=False)
fav_exp = hs_explainer.explain_instance(fav_instance, c1.predict_proba, num_features=15)
fav_exp.show_in_notebook(text=True)

Scrambled

In [134]:
scrambled = scramble(fav_instance)
scrambled_exp = hs_explainer.explain_instance(scrambled, c1.predict_proba, num_features=15)
scrambled_exp.show_in_notebook(text=True)

Without the Two Most Important Words

In [135]:
top2words = (np.array(fav_exp.as_list())[:2])[:,0]
top2words = top2words.tolist()
wo_most_impact_hs = fav_instance.replace(top2words[0],'').replace(top2words[1],'')
In [136]:
wo_exp_hs = hs_explainer.explain_instance(wo_most_impact_hs, c1.predict_proba, num_features=15)
wo_exp_hs.show_in_notebook(text=True)

2.3 Input Your Own Text!

In [137]:
test_input2 = "I’ve been taught binary logistic regression using the sigmoid function, and multi-class logistic regression using a softmax."
print("Test Input:")
print(test_input2 + '\n')
ti_pred2 = c1.predict([test_input2])[0]
print("Prediction: " + hs_classes[ti_pred2])
print("Confidence: " + str(c1.predict_proba([test_input2])[0,ti_pred2]))
Test Input:
I’ve been taught binary logistic regression using the sigmoid function, and multi-class logistic regression using a softmax.

Prediction: offensive language
Confidence: 0.6262388319097867

Most Impactful Features

In [138]:
a = vec.transform([test_input2]).T.toarray()
r1 = (np.array(vec.get_feature_names()).T[(a != 0).reshape(-1,)])
r2 = (clf2.coef_[0].reshape(1,-1))[a.T != 0]
product = (r2 * (a[a != 0]))
min5 = product.argsort()[:5]
max5 = product.argsort()[-5:]
idx_hs = np.concatenate((min5,max5))
plt.bar(r1[idx_hs],product[idx_hs])
plt.title('Most Impactful Features for y = hate speech')
plt.show()
In [139]:
r1 = (np.array(vec.get_feature_names()).T[(a != 0).reshape(-1,)])
r2 = (clf2.coef_[1].reshape(1,-1))[a.T != 0]
product = (r2 * (a[a != 0]))
min5 = product.argsort()[:5]
max5 = product.argsort()[-5:]
idx_hs = np.concatenate((min5,max5))
plt.bar(r1[idx_hs],product[idx_hs])
plt.title('Most Impactful Features for y = offensive language')
plt.show()
In [62]:
r1 = (np.array(vec.get_feature_names()).T[(a != 0).reshape(-1,)])
r2 = (clf2.coef_[2].reshape(1,-1))[a.T != 0]
product = (r2 * (a[a != 0]))
min5 = product.argsort()[:5]
max5 = product.argsort()[-5:]
idx_hs = np.concatenate((min5,max5))
plt.bar(r1[idx_hs],product[idx_hs])
plt.title('Most Impactful Features for y = neither')
plt.show()

Lime Text Explainer

In [101]:
ti_exp2 = hs_explainer.explain_instance(test_input2, c1.predict_proba, num_features=15)
ti_exp2.show_in_notebook(text=True)

Scrambled

In [64]:
scrambled = scramble(test_input2)
scrambled_exp = hs_explainer.explain_instance(scrambled, c1.predict_proba, num_features=15)
scrambled_exp.show_in_notebook(text=True)

Without the Two Most Important Words

In [65]:
top2words = (np.array(ti_exp2.as_list())[:2])[:,0]
top2words = top2words.tolist()
wo_most_impact_hs2 = test_input2.replace(top2words[0],'').replace(top2words[1],'')
In [66]:
wo_exp_hs2 = hs_explainer.explain_instance(wo_most_impact_hs2, c1.predict_proba, num_features=15)
wo_exp_hs2.show_in_notebook(text=True)